{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import joblib\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split \n",
    "import numpy as np\n",
    "import time\n",
    "import os\n",
    "\n",
    "# 计时器\n",
    "def timer (func):\n",
    "    def wrapper(*args,**kwargs): \n",
    "        start = time.time()\n",
    "        result = func(*args,**kwargs)\n",
    "        end = time.time()\n",
    "        print(func.__name__+'运行时间：',end-start)\n",
    "        return result\n",
    "    return wrapper"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 导入全量训练数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "@timer\n",
    "def load_data():\n",
    "    train = joblib.load('../semi_super/all_data.lz4')\n",
    "    return train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_data运行时间： 46.006404399871826\n"
     ]
    }
   ],
   "source": [
    "train = load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train_df = train[['tag']].copy()\n",
    "x_train_df = train.drop(columns=['tag'])\n",
    "x_train_df = x_train_df.fillna(-1)\n",
    "y_train_arr = y_train_df.values.ravel()\n",
    "x_train_arr = x_train_df.values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### bagging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def SelectModel(model_name):\n",
    "    if model_name == 'XGB':\n",
    "        from xgboost import XGBClassifier\n",
    "\n",
    "        model = XGBClassifier(max_depth=6,\n",
    "                              learning_rate =0.04, \n",
    "                              booster='gbtree',\n",
    "                              objective='binary:logistic',\n",
    "                              early_stopping_rounds=100,\n",
    "                              scale_pos_weight=float(len(y_train_arr)-np.sum(y_train_arr))/float(np.sum(y_train_arr)),\n",
    "                              eval_metric='auc',\n",
    "                              gamma=1,\n",
    "                              reg_lambda=1,\n",
    "                              subsample=0.9,\n",
    "                              min_child_weight=1,\n",
    "                              seed=2018,\n",
    "                              silent=False,\n",
    "                              n_jobs=24,\n",
    "                              num_boost_round =500\n",
    "                             )\n",
    "    elif model_name == 'RFC':\n",
    "        from sklearn.ensemble import RandomForestClassifier\n",
    "        model = RandomForestClassifier(n_estimators=3000,\n",
    "                                       n_jobs =36,\n",
    "                                       max_features='sqrt',\n",
    "                                       class_weight='balanced',\n",
    "#                                        verbose =1,\n",
    "                                       random_state=2018)\n",
    "    elif model_name == 'LGB':\n",
    "        from lightgbm import LGBMClassifier\n",
    "        model = LGBMClassifier(boost='gbdt',\n",
    "                    num_leaves=200, \n",
    "                    scale_pos_weight=float(len(y_train_arr)-np.sum(y_train_arr))/float(np.sum(y_train_arr)),\n",
    "                    max_depth=-1,\n",
    "                    learning_rate=.04,\n",
    "                    max_bin=200,\n",
    "                    min_data_in_leaf= 60,\n",
    "                    objective='binary',\n",
    "                    metric='auc',\n",
    "                    num_threads=32,\n",
    "                    slient=False,\n",
    "                    num_boost_round =500)\n",
    "    else:\n",
    "        pass\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py:102: UserWarning: Found `num_boost_round` in params. Will use it instead of argument\n",
      "  warnings.warn(\"Found `{}` in params. Will use it instead of argument\".format(alias))\n"
     ]
    }
   ],
   "source": [
    "if not os.path.exists('./model'):\n",
    "    os.mkdir('model')\n",
    "model_list = ['XGB','RFC','LGB']\n",
    "for model in model_list:\n",
    "    print(model)\n",
    "    clf = SelectModel(model)\n",
    "    clf.fit(x_train_arr,y_train_arr)\n",
    "    joblib.dump(clf,'./model/{}'.format(model))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 预测valid的tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导入valid数据\n",
    "valid_date = pd.read_csv('../../preprocess_data/valid_date.csv').drop(columns=['id','loan_hour'])\n",
    "valid_raw = joblib.load('../../preprocess_data_new/valid_nodup.lz4').drop(columns=['id','loan_dt'])\n",
    "valid_null = pd.read_csv('../../preprocess_data_new/valid_row_null.csv').drop(columns=['id'])\n",
    "maj_cnt_test = joblib.load('../../preprocess_data_discrete/maj_cnt_test.lz4')\n",
    "\n",
    "valid = pd.concat([valid_date,valid_raw,valid_null,maj_cnt_test],axis=1)\n",
    "valid = valid.fillna(-1)\n",
    "x_test_arr = valid.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_score(save_path,model_list,uid,x_test_arr):\n",
    "    if not os.path.exists(save_path):\n",
    "        os.mkdir(save_path)\n",
    "    # model_list = \n",
    "    for model in model_list:\n",
    "        pred = pd.DataFrame()\n",
    "        pred['id'] = uid\n",
    "        clf = joblib.load('./model/{}'.format(model))\n",
    "        pred['score'] = clf.predict_proba(x_test_arr)[:,1]\n",
    "        pred.to_csv(os.path.join(save_path,'{}.csv'.format(model)),index=False)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 参数\n",
    "valid_save_path = './valid_preds'\n",
    "model_list = ['XGB','RFC','LGB']\n",
    "valid_id = pd.read_csv('../../preprocess_data_new/valid_date.csv',usecols=['id']).values.ravel()\n",
    "\n",
    "predict_score(valid_save_path,model_list,valid_id,x_test_arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "valid_id = pd.read_csv('../../preprocess_data_new/valid_date.csv',usecols=['id']).values.ravel()\n",
    "model_list = ['XGB','RFC','LGB']\n",
    "valid_tag_name = 'valid_tag'\n",
    "valid_pred_dir = './valid_preds'\n",
    "\n",
    "def combine_score_to_tag(pred_dir,model_list,tag_name,uid):\n",
    "    pred_path = os.path.join(pred_dir,'{}.csv'.format(model_list[0]))\n",
    "    score = pd.read_csv(pred_path)['score']\n",
    "    for model in model_list[1:]:\n",
    "        pred_path = os.path.join(pred_dir,'{}.csv'.format(model))\n",
    "        score += pd.read_csv(pred_path).score\n",
    "    score = score/len(model_list)\n",
    "    tmp_score = score.copy()\n",
    "    # 取前30%为0\n",
    "    score[score<np.percentile(tmp_score,30)] = 0\n",
    "    score[score!=0] = 1\n",
    "    tag = pd.DataFrame()\n",
    "    tag['id'] = uid\n",
    "    tag['tag'] = score.values\n",
    "    tag.to_csv('./{}.csv'.format(tag_name),index=False)\n",
    "    return tmp_score\n",
    "valid_score = combine_score_to_tag(valid_pred_dir,model_list,valid_tag_name,valid_id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### predict_tag_new和predict_tag的比较"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f2d70753080>"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEKCAYAAADpfBXhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xd4XPWd7/H3d0aj3qvV5Y4LbsiNBUyPExIggYQSCNyQkJCQssm2lJvdzebuXpKHJDeQQMiSBEggZBOKE0qophjbuHdbluWmLlll1Nv87h8z4hHCssbSaM6ZM9/X88zDlKOZ70HyRz/92hFjDEoppZzFZXUBSimlQk/DXSmlHEjDXSmlHEjDXSmlHEjDXSmlHEjDXSmlHEjDXSmlHEjDXSmlHEjDXSmlHCjGqg/Ozs42ZWVlVn28UkpFpG3btjUbY3LGO86ycC8rK2Pr1q1WfbxSSkUkETkezHHaLaOUUg6k4a6UUg6k4a6UUg6k4a6UUg6k4a6UUg6k4a6UUg6k4a6UUg6k4a6UUg6k4a6UUg5k2QpVNXGPbz5xxtdvXlkSpkqUUnalLXellHIgbbk7kLbslVLacldKKQfScFdKKQfScFdKKQfScFdKKQfScFdKKQfScFdKKQfScFdKKQcaN9xFJF5E3hWRXSKyT0T+/TTHxInIkyJSKSKbRaRsKopVSikVnGBa7n3ApcaYxcASYK2IrBp1zB1AqzFmFvAT4J7QlqmUUupsjBvuxq8z8NATuJlRh10DPBK4/yfgMhGRkFWplFLqrAS1/YCIuIFtwCzg58aYzaMOKQROAhhjBkWkHcgCmke9z53AnQAlJboEfizjbR+glFLjCWpA1RgzZIxZAhQBK0Rk4UQ+zBjzkDGm3BhTnpOTM5G3UEopFYSz2jjMGNMmIq8Da4G9I16qAYqBahGJAdKAUyGrUo2pwdvLkaZOTrR0k5UUy4Wzc4j3uK0uSyllsXHDXURygIFAsCcAV/DBAdN1wG3ARuB64DVjzOh+eRVibx1u4oW99QCkxMewu7qdzUdbuGJ+HiunZ1lcnVLKSsG03POBRwL97i7gj8aYv4rI94Gtxph1wMPAYyJSCbQAN05ZxQqADZXNvLC3noWFaXx44TQyEmOpaevhhT11PLuzlhiXi/NKM6wuUyllkXHD3RizG1h6mue/N+J+L/DJ0JamxrLteAvP7aljQUEqN5QX43b5JyYVpifw2Qum8+sNR1m3q4aijATyUuMtrlYpZQVdoRphuvsGeW5PHdOzk7hxecl7wT7MJcIN5cXExrh54t0T9A/6LKpUKWUlDfcI8+qhRvoGfFy9uOADwT4sJd7DDeXFNHX08fqhxjBXqJSyAw33CNLc0cfmqlMsL8sct7tlVm4yCwrT2FR1it6BoTBVqJSyCw33CPLCvno8bheXzcsN6viL5+TQN+hjU5XOSlUq2mi4R4gGby8H6rxcODuHlHhPUF9TkJ7AnLxkNlQ2a9+7UlFGwz1CbD3WgluEFdMzz+rr1szJpat/iG3HW6aoMqWUHWm4R4CBIR/bT7QxvyCV5LizWlTM9OwkSjMTebuyGV1XplT00HCPAPtrvfQMDLG87Oxa7cNWTM+ktXuAEy3dIa5MKWVXGu4RYMuxFjISPczISZrQ18/PT8XjFnaebAtxZUopu9Jwt7lTnX1UNXexvCwT1wS3yI/zuDlnWip7atoZ8mnXjFLRQMPd5nacbEOAZSWT2ydmSXE63f1DHG7sCE1hSilb03C3uYN1XkqyEklNCG7641hm5yWT4HFr14xSUULD3cbauvupbe9l3rTUSb9XjMvFuYVpHKjz0tU3GILqlFJ2puFuYwfqvIB/QDQUFhWnMTBkWH+oKSTvp5SyLw13GztQ10FOchzZKXEheb/SzCTiPS7W62ZiSjmehrtN9Q4MUdXcybwQtdoB3C5hdm4K6yua8OmsGaUcTcPdpg41dOAzMC8/JaTvO3daCk0dfewPdPkopZxJw92mDtR5SYqLoTgzMaTvOyfP/8vi9YPaNaOUk2m425DPGA43dDI3L2XCC5fGkhwXw+KiNL2Ih1IOp+FuQw3eXnoGhpg5we0GxnPx3Fx2nGyjpat/St5fKWU9DXcbqmrqAvw7Ok6FS87JxRh467BOiVTKqc5u/1gVFlVNnWQmxZKeGDsl77+3pp2kWDe/2XCMrr4PXoLv5pUlU/K5Sqnw0Za7zQz5DEdPdTFjilrtAC4RZuQkc7S5S/d4V8qhxg13ESkWkddFZL+I7BORr53mmItFpF1EdgZu35uacp1vf62X3gEfM3KSp/Rzpmcn0d4zoP3uSjlUMN0yg8A3jTHbRSQF2CYiLxtj9o867i1jzEdDX2J02VjVDDDhvduDNfyXwdHmLrKSQ7MCVillH+O23I0xdcaY7YH7HcABoHCqC4tWG4+cIjs5jtQgL4I9UTkpcSTFxVDV3DWln6OUssZZDaiKSBmwFNh8mpdXi8guoBb4B2PMvklX51CPbz5x2ueHfIZ3jpxicXH6lNcgIszITnqv311CPJ9eKWWtoAdURSQZ+DPwdWPM6LXr24FSY8xi4D7gmTHe404R2SoiW5uadBreaHXtPfQN+qZ0MHUk7XdXyrmCCncR8eAP9t8bY54a/boxxmuM6Qzcfx7wiEj2aY57yBhTbowpz8nJmWTpzjN8AevSrPCE+8h+d6WUswQzW0aAh4EDxpgfj3HMtMBxiMiKwPueCmWh0eBkSzep8TGkTfKqS8HSfnelnCuYPve/A24F9ojIzsBz3wZKAIwxDwLXA3eJyCDQA9xodAL1WTvZ2hPyjcLORPvdlXKuccPdGPM2cMZ/9caY+4H7Q1VUNOrsG6Slq5+V0zPD+rnTs5PYU9NOa/cAmUlTsyJWKRV+ukLVJk4G+tuLMsLXcgcozfJ/3okW7ZpRykk03G3iZEs3LoHC9ISwfm5eajyxMS6On+oO6+cqpaaWhrtNnGjtZlqaP2jDySVCSUbiezN1lFLOoOFuAz5jqG7toTjMXTLDSrISqW/vpW/ggztEKqUik4a7DTR6++gf9FESxpkyI5VkJmLwz9ZRSjmDhrsNDA+mhnMa5EjFGYkIOqiqlJNouNvAydZuEjxusiyaipgQ6yY3NU773ZVyEA13G6ht66EwPcHSRUQlmUmcaOnGp2vPlHIEDXeLDfp8NHj7KEiPt7SO0sxEegd8NHX0WVqHUio0NNwt1ujtY8gYCsI8v3204cVMOt9dKWfQcLdYbZt/horV4Z6ZFEtirJuTrRruSjmBhrvFatp6iItxWb6vi4hQlJFAtYa7Uo6g4W6xuvZe8tPicdlgR8bijEQavX109g1aXYpSapI03C3kM4a69h7Lu2SGFWX4FzPtrm6zuhSl1CRpuFuoqaOPgSFDQZo9wr04w1/HrpPtFleilJosDXcL1bXbYzB1WGJcDFlJsew82Wp1KUqpSdJwt1BtWy8xLiEnJc7qUt5TlJGgLXelHEDD3UI1bT1MS4vH7bJ+MHVYcWYi9d5e6tt7rS5FKTUJGu4WMcODqTbpbx82fCUo7ZpRKrJpuFukrWeA3gEf+RZvOzBaflo8HrewU7tmlIpoGu4WGe72mJZqr3D3uF3Mz0/VlrtSEU7D3SL1Xn+459ks3AEWF6ezp7qdIZ/uEKlUpNJwt0h9ey8ZiR7iPW6rS/mAJcXpdPUPUdnYaXUpSqkJGjfcRaRYRF4Xkf0isk9EvnaaY0REfiYilSKyW0SWTU25zlHv7bVdl8ywxcXpgA6qKhXJgmm5DwLfNMbMB1YBXxaR+aOO+TAwO3C7E3ggpFU6zMCQj1OdfUxLs2e4T89KIjU+RgdVlYpg44a7MabOGLM9cL8DOAAUjjrsGuBR47cJSBeR/JBX6xBNHX34jD372wFcLmFxcTo7T+oeM0pFqrPqcxeRMmApsHnUS4XAyRGPq/ngLwAV8N5MGZu23MHf717R0EF3v+4QqVQkCjrcRSQZ+DPwdWOMdyIfJiJ3ishWEdna1NQ0kbdwhHqvf9uBrCT7bDsw2pLidIZ8hr01E/pWK6UsFlS4i4gHf7D/3hjz1GkOqQGKRzwuCjz3PsaYh4wx5caY8pycnInU6wj13l5yU+Nste3AaDqoqlRkC2a2jAAPAweMMT8e47B1wGcCs2ZWAe3GmLoQ1uko9e29TEu117YDo2Unx+kmYkpFsJggjvk74FZgj4jsDDz3baAEwBjzIPA88BGgEugG/lfoS3WG5k7/lY6mpdq3S2bYkuJ0dpzQQVWlItG44W6MeRs4Y/+BMcYAXw5VUU52qL4DgGk22zDsdJYUp/PX3XU0dvSSm2LfwV+l1AfpCtUwOxgI97wIabmDXplJqUik4R5mhxs6SIx1kxwXTI+YtRYUpOF2iQ6qKhWBNNzD7HBjJ7kp8fjHqe0tIdbNOdNStOWuVATScA8jYwwVDR3kRkCXzLAlxensOtmGT3eIVCqiaLiHUYO3j47eQfJsdM3U8SwuTqejb5CqZt0hUqlIouEeRocb/YOpuTbdU+Z0lr63mEm7ZpSKJBruYVTR4G/95kZQy31GTjLJcTE6qKpUhNFwD6PDDR1kJHoiYqbMMLdLWFSUpoOqSkWYyEkZBzjc2MnsvBTbz5R5fPOJ9z32uF3sq23nkXeO4XG7uHlliUWVKaWCpS33MBmeKTM7N9nqUs5acUYCPgO1bT1Wl6KUCpKGe5gMz5SZk5didSlnrSgzEYDqVg13pSKFhnuYDM+UmZ0XeS331HgPaQkeTrZ2W12KUipIGu5hMjxTZnZu5LXcAYoyErTlrlQE0XAPk+GZMtnJsVaXMiHFGYm0dPXT2aeX3VMqEmi4h0mkzJQZS1Gmf4viau2aUSoiaLiHQSTPlBlWmJ6AoIOqSkUKDfcwaOyI3Jkyw+Ji3OSlxnOyRVvuSkUCDfcwqGiI3JkyIw0PqvovvKWUsjMN9zCI9Jkyw4ozE+kZGOLYKW29K2V3Gu5hUNkY2TNlhhVl+AdVdRMxpexPwz0MKhoie6bMsLzUeGLdLt1ETKkIoOE+xZwwU2aYS4SC9AR2nGyzuhSl1Dg03KeYE2bKjFScmcCBWi+9A0NWl6KUOoNxw11Efi0ijSKyd4zXLxaRdhHZGbh9L/RlRi6nzJQZVpqZSP+Qj7012jWjlJ0F03L/LbB2nGPeMsYsCdy+P/mynMMpM2WGlWQlAbD1uA6qKmVn416swxjzpoiUTX0pzjHyYhcv7q0jMdbNS/vqI35AFSA5LoYZOUlsPdYCa2ZaXY5Sagyh6nNfLSK7ROQFEVkQovd0hAZvH7kp8Y4I9mHlpRlsPd6Kz6eLmZSyq1CE+3ag1BizGLgPeGasA0XkThHZKiJbm5qaQvDR9maMobGjl9zUyLkgdjDKyzJp6x7gSFOn1aUopcYw6XA3xniNMZ2B+88DHhHJHuPYh4wx5caY8pycnMl+tO119A7SO+AjL8VZ4b68LBOALce0310pu5p0uIvINAn0OYjIisB7nprs+zpBQ0cvALmp8RZXElplWYlkJ8ey9XiL1aUopcYw7oCqiDwBXAxki0g18K+AB8AY8yBwPXCXiAwCPcCNRneWAvz97eBf2ekkIkJ5aSZbteWulG0FM1vmpnFevx+4P2QVOUijt5ekWDfJceP+b4445WUZvLivngZvr+N+eSnlBLpCdQo1eHsd1yUzrPy9fnftmlHKjjTcp4h/pkwfeQ6bKTNsQUEqibFu3j2q4a6UHWm4T5H2ngH6Bn3kpjiz5e5xuygvy+SdIzp2rpQdabhPEacOpo50/swsKhs7aQzMClJK2YeG+xQZDjyndssArJ6RBcCmKu2aUcpuNNynSIO3l5S4GBJjnTdTZtiCglRS4mLYqF0zStmOhvsUafD2ObpLBiDG7WLF9Ew2VWm4K2U3Gu5TwOfQPWVOZ/XMLI42d1Hfrv3uStmJhvsUaOseYGDIkOfQmTIjrQr0u2+sara4EqXUSBruU6DBO7ynjPNb7vPzU0lL8Gi/u1I2o+E+BRq9wzNlnN9yd7mEVTMy2VB5Ct1SSCn70HCfAg0dfaQleIj3uK0uJSwumpNDTVuP7u+ulI1ouE+BBm8vuQ7bw/1M1szx782//pDzL8CiVKTQcA+xIZ+hqcP50yBHKspIZFZuMm9UaLgrZRca7iF2oqWbQZ9x9MrU07l4Tg6bq1ro7h+0uhSlFBruIVfR0AHg2A3DxrJmbg79Qz5d0KSUTWi4h1hFfSDco6zlvmJ6JgkeN29ov7tStqDhHmIVjZ1kJHqIi4mOmTLD4mLcnD8zi/Xa766ULWi4h9jhho6o65IZtmZuDsdPdXO0ucvqUpSKehruITQw5KOqqSvqBlOHXXpOLgAv76+3uBKllIZ7CB0/1UX/kC+qpkGOVJSRyMLCVF7cq+GulNWcu9m4BSoa/Cs0nXpR7GGPbz4x5mv5aQm8vL+BBm9v1P6SU8oOtOUeQhUNHYhATnJ0dssALMhPBeClfdp6V8pKGu4hVNHQQUlmIrEx0fu/NTc1npk5Sbyo4a6UpcZNIRH5tYg0isjeMV4XEfmZiFSKyG4RWRb6MiNDRUMnc/JSrC7DcmsXTmNTVQutXf1Wl6JU1AqmiflbYO0ZXv8wMDtwuxN4YPJlRZ6+wSGONXcxJy/Z6lIst3ZBPkM+wysHGqwuRamoNW64G2PeBM50eftrgEeN3yYgXUTyQ1VgpDjc0MmgzzAv0OcczRYWplKYnsBfd9dZXYpSUSsUs2UKgZMjHlcHnvvAv2wRuRN/656SkpIQfLR97K/zAv4rE22qOtPvQucTEa5dWsAD64/4ryUbpYu61MSdaUZWMG5e6ax8mYiwjvwZYx4yxpQbY8pzcnLC+dFT7kCdlwSPm9KsJKtLsYWPLy3CZ2DdzlqrS1EqKoUi3GuA4hGPiwLPRZX9tV7OyU/B7RKrS7GFWbnJLC5K46ntUfejoJQthCLc1wGfCcyaWQW0G2OiqrPVGMOBOq/2t4/yiWVF7K/zcrDea3UpSkWdYKZCPgFsBOaKSLWI3CEiXxSRLwYOeR6oAiqBXwFfmrJqbaqmrQdv76CG+ygfW1xAjEt4WlvvSoXduAOqxpibxnndAF8OWUUR6ECdfw/3+Rru75OZFMvFc3N5ekcN//ihucS4o3dxl1Lhpv/aQuBAnRcROGeaLmAa7VPlRTR29PHKgUarS1Eqqmi4h8D+Wi9lWUkkxek+bKNdNi+PwvQEHt14zOpSVAQaGPLRNzBkdRkRSdMoBA7Ue1lQoF0yp+N2CZ9eVcIPXzzE4YYOZuv2DOoMBod8vH6oicffPUFdWw8tXf0YIDHWTXZyHCunZ7KoKF1npQVBW+6T1NE7wPFT3drffgY3lBcTG+Pi0Y3HrS5F2VTvwBAPvXmE8//va3z+0a0ca+5iWlo8l5yTy4cWTGNhQRq9A0P8z7ZqfvJKxXsXoldj05b7JB0KXBBbZ8qMLSs5jo8tKuCp7dX809q5pMR7rC5J2YQxhnW7avnhi4eoaevhwtnZ/ODahTR4+z7QOvcZw8E6Ly/tb+CRd45x7dJClpdlWlS5/WnLfZL21Qa2HdBumTO67fxSuvqHeHLLyfEPVlGhrr2Hz/52C1/7w04yk2J5/HMreeyOlVy5YNppu11cIswvSOOui2cyKzeZp3fU8NpBHagfi7bcJ2lXdRvZyXFM06sOndGionRWzcjkV29VccuqUuI9bqtLUhYxxvDNP+7iuT11+Izho4vyWTUji2Onujl2avw9ZeJi3HxmdRl/3l7NKwcayEqKZXFxehgqjywa7pO0u7qdxUVpiOgAz7CxNn2an5/GpqoW/rStmltWlYa5KmUH1a3dfOupPbx1uJnp2Ul8YmkhWRO4cpnbJVy3rIjWrn6e3lFDfnq8blA3inbLTEJn3yBHmjpZVKSthmDMzEmiOCOBB9YfYWDIZ3U5Kox8PsPvNh3nQz95k23HW7l6cQF3XDB9QsE+zO0SblxRQoxbeHzzCfoH9WdqJA33SdhT3Y4xsKg4zepSIoKIcMk5udS09fD0Dt2SIFocP9XFp/97M999Zi9LSzL429cvYtWMLFwh+Gs3LcHDDcuLaero41W9OMz7aLhPwu7qNgAWFWq4B2tuXgoLClK5/7VK+gZ1cYqTDQ75+OUbR/jQT99kb007//WJc3nsjhUUZyaG9HNm56ZwXmkGG44009jRG9L3jmQa7pOwu6adwvSESf1pGW1EhH9aew4nWrp5TOe9O9a+2nY+/ot3+K8XDnLh7Bxe/sYablpRMmVjU1cumIbH7eK53XX4t7tSOqA6Cbur21isXTJnbc2cHC6ak8PPXj3MdcuKyEiKtbokFSJdfYPc91olD715hITYGG5aUcLCgtQpn7KYHBfD5fPyeG5PHQfrdYETaMt9wlq6+jnZ0qODqRP0nY/Mo7NvkJ+9dtjqUlQIGGN4dmcNl967ngffOMLS4gz+/vLZnFsYvplkq2ZkkZsSxwt76xjyaetdw32C3utvL9KW+0TMnZbCDcuLeWzjcSobtaUVyfbWtPPJBzfytT/sJDclnj/fdT7XnVdEYmx4OwbcLuHyeXk0d/bz1916eUftlpmg3dXtiMC5Opg6Yd+4Yi7P76nnW0/t4ck7V+PSzaAixuObT9DeM8CrBxrYdryVxFg3n1hayLLSjPe25LDC/IJU8lLjuO+1Sj66qCCqNxjTlvsE7a5uY0Z2ku6TMgk5KXF896p5bDnWyu/fndzV7lX4tHX388LeOu596RA7TrZx/swsvnHFXMrLMkMyvXEyXCJcMjeXysZOnt8TVVf7/ABtuU+AMYbtJ9q4ZG6u1aVEvOvPK+LZnbXc88JBLp+XS35agtUlqTF09w/ymw3HePCNI3T2DrK0JJ3Lzsmz3YD4wsI0Zh9v5b7XDnPVuflR+xehttwn4EhTJy1d/ayYnmF1KRFPRPjPj5/LoM/HP/1pNz4dCLOdgSEfj206zpofredHfzvEyulZfOWy2Vx/XrHtgh38rfe7L51FRUMnr0bxxmLacp+Ad4+2Auh2oxN0ur1nPrRgGs/urOWu32/nl7eeZ0FVarSBIR9Pb6/h/tcrOdHSzYqyTB68ZRnnlWaOuX+QXVx1bj73vHCQ32w4yhXz86wuxxIa7hOw5VgL2cmxTM9OsroUx1hRlsmRpi5e3l/PtuOtnFeqfxVZZWDIx1Pbq7n/9UpOtvRwbmEav7l9ORfPzYmYDfJi3C5uXV3GPS8e5GC9l3OmRd+W3BruE/Du0RaWl2VGzA96JBARPr6kkJrWbr76xA6evfvvyNaVv2E1HOr3vVZJdWsPhekJfGZ1KXPzUqhr7+WJdyNnL/7HN5/A4xY8buG7T+/lE8uK3vf6zStLLKosfDTcz1JNWw81bT187sLpVpfiOAmxbm5aUcLDbx/lC49t4/efW6n7vk+Rkd0qg0M+dpxoY31FI63dAxRlJHDb6lLm5KVEdAMmMTaGpcUZbD/RyocWTIu6C9gHNaAqImtF5JCIVIrIv5zm9dtFpElEdgZunwt9qfaw5WgLoP3tU6UoI5Eff2oJ24638q2n9ug+IVOod2CINyua+NFLh3h6Zw1JcTHctrqUu9bMZO601IgO9mGrZ2Yx6DNsOdZidSlhN+6vMhFxAz8HrgCqgS0iss4Ys3/UoU8aY+6eghpt5d1jLaTExeg1U6fQVYvyqWqaw70vV1CYnsA/fGiu1SU5SqO3lxf31rP56Cn6Bn3MzEni+vOKmJWT7IhAHykvNZ7ZuclsqjrFhbNzompRUzB/p6wAKo0xVQAi8gfgGmB0uEeFLUdbWFaaEVU/JFa4+9JZ1Lb3cP/rlSTEuvnyJbOsLiniHW3u4qE3j/DnbTUMDPlYUJjGmtk5FGY4e23B+TOzeGTjcfbWtEfV5fiCCfdCYORISjWw8jTHXSciFwEVwN8bYyJn9CVIrV39HG7s5NqlhVaX4ngiwg+uPZfu/iF+9LdDxMW4+NyFM6wuK+L4fIa3K5t55J1jvHaoEY/bxfXlReSnxkfNVtWz81LITo5lw5FmDfcJ+AvwhDGmT0S+ADwCXDr6IBG5E7gToKQk8kar365sBmDVDO1vDwe3S7j3k4vpH/Txg+cO4O0d5O8vn+24roOp4O0d4M/bqnls43GqmrvITo7l7ktmcevqUnJT4m0/Tz2UXCKsnpnNX3bVcqKlm5IQXyzEroIJ9xqgeMTjosBz7zHGnBrx8L+BH57ujYwxDwEPAZSXl0fcSNn6Q02kJ3pYUqxzsMMlxu3ivpuW8u2n9/CzVw/T3NnH969eQIxbF1ePZoxh2/FWntxykmd31tI/5KM4I4FPlRexsCCNGLeLV/ZH54rNZSXpvLy/nneONFOSGXkNy4kIJty3ALNFZDr+UL8RuHnkASKSb4wZ3qXnauBASKu0AZ/P8EZFU9QNythBjNvFPdctIis5jgfWH+H4qS7uu2kZmTZc+m6Fpo4+nt5RzZNbTnKkqYukWDeLitJYMT2ToozoaKWOJy7GTXlpJu8caaZ94YDV5YTFuOFujBkUkbuBvwFu4NfGmH0i8n1gqzFmHfBVEbkaGARagNunsGZL7Kv10tzZx8VzcqLqT1orjPX/tzgjkeuWFfKX3XV87L63eeCWZVF7sZTOvkFe2lfPMztr2VDZzJDPcF5pBj+8biZXLcrn2Z26n/loq2ZksaGymc1Vp7jr4plWlzPlxKp5xOXl5Wbr1q2WfPZE3PfqYe59uYKt372cl/bpVdattKAglS/+bhtNHX189bLZfOnimVHRTdPVN8ibFU08t6eOv+2rZ2DIkJHoYXFROkuK08lNjbe6RNv73abjHDvVxfb/fUXELpATkW3GmPLxjouuJVuTsL6iiUVFabok3gYWF6fz4tcu4nvr9vLjlyt45UAD/3HNQkfOhGjq6OPVAw28tL+Btyub6R/0kZkUy3mlGSwuSqckM1EHmM/C+TOz2F/n5dmdNdyw3Nl97xruQWjr7mfHiVbu1rnWtpGW6OH/3biUK+bn8W/r9nPtLzbwqfOK+eaVcyK6BdvePcC7x1rYeOQUm6pOcaDeizFQlJHALStLuXJBHuWlGfxxa7XVpUak6dlJTEuN5zcbjvGp8mJH/2LUcA/Cm4ccwE0KAAAMvklEQVSb8RlYoxfnsJ2PLipgzZwcfvbqYX6z4RhP76zh5hUlfGHNDNtf+KOjd4CD9R3sr/Wyv9bL3tp29tf5wzzGJZRkJXLZOXnMy09hWmo8IkJVUxdVTV1Wlx6xRITzZ2bx1I4aNlad4vyZ2VaXNGU03IPw8v4GMhI9LHHgn/1OkBLv4TtXzefWVWX8/PVKfrfpOI9tOs4V8/K4eWUJ58/MCnuf/OObTzAw5KO9Z4C27gHauvtpDfy3rWeA/kEfNW097x2fmRTL/PxUvn7ZHFbNyORgfQeeKBhHsMLi4nTeqGjioTerNNyjWWffIC/vr+e6ZUU6BdImzjRbaXGxvx/a2zfA/2yt5sV99WQmxXLFvDwunpvD8umZIRk3Mcbg7RmkzttDTat/p9Ca1h6qA/890thJR9/g+75GgNQED+mJHpaXZXBzXgnz81OZX5BKbkrc+7oIjmjrfMp43C5uP7+Me1+u4ECd17H7RGm4j+OlffX0Dvh0y4EIkpEUy5cvncU3rpjD6wcbeWFvPc/tqePJrf4dMcqyEpmTl8LsvGTy0xLISYkjLcFDXIwLj9tFz8AQPf1DdPcP0TMwSHv3APXePhq8vdS199Dg7aOuvYfeAd/7Pjc2xkVhegKF6QnMnZZCeqKH9MRY0hM9ZCTEkprg+UADoa69l7r23rD9v1F+t64u5YE3jvDQm1X85IYlVpczJTTcx/HMzloK0xM4r0RXpUaauBg3axfms3ZhPv2DPvbWtrO5qoVdJ9s43NjBqwcbGQrymq1uEVITYkiN95Ca4KG8NJPUBA+p8TFkBAI8KS4Gl4MH6JwkPTGWm1aU8Nt3jvHNK+c4crGXhvsZNHX08fbhJr64ZmbUXkE9Uo3VdZOW4OGiOTk8eOt5DA75ONXVT6O3j47eAfoGffQP+UjwuNlQ2YzH7SI2xkW8x01irFuD22HuuGA6j7xzjIffPsq/fmyB1eWEnIb7Gfx1dy0+Ax/XLhnHGW+VcWmWXh/X6QrSE7h2aSGPbz7BF9fMJC+Cp9Cejg7Hn8EzO2uZn5/K7LwUq0tRSk2Br102G58x3PfaYatLCTkN9zHsPNnGrpNtXHde0fgHK6UiUnFmIjcuL+EP757kxKluq8sJKQ33MfzyjSOkxsdww/Li8Q9WSkWsr1w6ixi38NNXKqwuJaQ03E/jaHMXL+6r59bVpSRH2RXTlYo2uanx3HZ+GU/vrGFvTbvV5YSMhvtp/OqtKjxuF7edX2Z1KUqpMPjSmllkJcXynWf2Bj091u403Edp6ujjT9uquW5ZEbkpzho9V0qdXlqih+9eNZ9dJ9t4fPNxq8sJCQ33UX788iGGfIbPXzjd6lKUUmF0zZICLpiVzQ9fPESjN/JXDWu4j7D1WAtPvHuSOy6YzoycZKvLUUqFkYjwH9cupG/Ixz//eTe+CO+e0XAP6B/08e2n91CYnsDXL59tdTlKKQtMz07iu1fN4/VDTfxifaXV5UyKhnvAr96qoqKhk3+/egGJsTpDRqlodeuqUq5dUsC9L1fw9uFmq8uZMA134NUDDdz70iGuOjefy+fnWV2OUspCIsJ/fuJcZucmc/cT29lXG5nTI6M+3HdXt3H34ztYUJDGjz65yOpylFI2kBgbw8O3LScpNoabf7WZPdWRF/BRHe67q9v47G+3kpkUy8O3l2t3jFLqPcWZifzhzlUkx8Vw839virgumqgMd2MMT7x7gusf2EhcjItHPrtc57QrpT6gODORJ7+wirzUeG55eDP/57n99A0OWV1WUIIKdxFZKyKHRKRSRP7lNK/HiciTgdc3i0hZqAsNlX217Xz+0a1866k9rJyRyV++cgGzcnXXR6XU6RVlJPKXuy/gllUl/Oqto3z4p2/x1PZqBod843+xhcbthxARN/Bz4AqgGtgiIuuMMftHHHYH0GqMmSUiNwL3ADdMRcET4e0d4PWDjfxlVy2vHGgkNT6Gf157DndeNEOvi6qUGldCrJsfXHsul83L44cvHuIbf9zFT185zNWLC1i7cBoLClLfdw1cOwimk3kFUGmMqQIQkT8A1wAjw/0a4N8C9/8E3C8iYoyZklUAPp9hwOdjcMgwOOS/3zswREfvIN6eARo6+qhr6+FIUyf7ar1UNHQwMGTISYnjq5fO4o4LZ5CW4JmK0pRSDnbJ3FzWzM7hlQMN/PadYzzwxhHuf72StAQP5xamMS8/hcL0BArSE0hN8JAcF0NyXAxJcTEkxbmJcblwuwSXMOW/DIIJ90Lg5IjH1cDKsY4xxgyKSDuQBYR8BOKvu2u5+/EdQR2bmRTLgoJUPn/hDC6bl8fS4nS9XJ5SalJcLuHKBdO4csE0Wrr6efVAA9tPtLGnpo1HNx6nb3D87povrJnBtz48b0rrDOv0EBG5E7gz8LBTRA5N5ecdB4L7NRCUbKbgl1WYOeEcwBnn4YRzgAg9j09/8Kmwnse374FvT/zLS4M5KJhwrwFGXrGiKPDc6Y6pFpEYIA04NfqNjDEPAQ8FU5jdiMhWY0y51XVMhhPOAZxxHk44B9DzsLNgZstsAWaLyHQRiQVuBNaNOmYdcFvg/vXAa1PV366UUmp847bcA33odwN/A9zAr40x+0Tk+8BWY8w64GHgMRGpBFrw/wJQSillkaD63I0xzwPPj3rueyPu9wKfDG1pthOR3UmjOOEcwBnn4YRzAD0P2xLtPVFKKeeJyu0HlFLK6TTcR3HCVgtBnMM3RGS/iOwWkVdFJKipVeE23nmMOO46ETEiYrvZDsGcg4h8KvD92Ccij4e7xmAE8TNVIiKvi8iOwM/VR6yo80xE5Nci0igie8d4XUTkZ4Fz3C0iy8JdY0gZY/QWuOEfMD4CzABigV3A/FHHfAl4MHD/RuBJq+uewDlcAiQG7t9lt3MI9jwCx6UAbwKbgHKr657A92I2/uUYGYHHuVbXPcHzeAi4K3B/PnDM6rpPcx4XAcuAvWO8/hHgBUCAVcBmq2uezE1b7u/33lYLxph+YHirhZGuAR4J3P8TcJnYa1OJcc/BGPO6MaY78HAT/rULdhPM9wLgP/DvZWTHKxoHcw6fB35ujGkFMMY0hrnGYARzHgZIDdxPA2rDWF9QjDFv4p/NN5ZrgEeN3yYgXUTyw1Nd6Gm4v9/ptlooHOsYY8wgMLzVgl0Ecw4j3YG/tWI3455H4M/mYmPMc+Es7CwE872YA8wRkQ0isklE1oatuuAFcx7/BtwiItX4Z9Z9JTylhdTZ/tuxNb06RRQTkVuAcmCN1bWcLRFxAT8Gbre4lMmKwd81czH+v6DeFJFzjTFtllZ19m4CfmuMuVdEVuNf97LQGGPvfXEdTFvu73c2Wy1wpq0WLBTMOSAilwPfAa42xvSFqbazMd55pAALgfUicgx/H+k6mw2qBvO9qAbWGWMGjDFHgQr8YW8nwZzHHcAfAYwxG4F4/Pu1RJKg/u1ECg3393PCVgvjnoOILAV+iT/Y7djHC+OchzGm3RiTbYwpM8aU4R87uNoYs9Wack8rmJ+nZ/C32hGRbPzdNFXhLDIIwZzHCeAyABGZhz/cm8Ja5eStAz4TmDWzCmg3xtRZXdSEWT2ia7cb/hHzCvyzA74TeO77+IMD/D+0/wNUAu8CM6yueQLn8ArQAOwM3NZZXfNEzmPUseux2WyZIL8Xgr97aT+wB7jR6poneB7zgQ34Z9LsBK60uubTnMMTQB0wgP8vpjuALwJfHPG9+HngHPfY8efpbG66QlUppRxIu2WUUsqBNNyVUsqBNNyVUsqBNNyVUsqBNNyVUsqBNNyVUsqBNNyVOguBVclK2Z6Gu3I8EUkSkedEZJeI7BWRG0RkuYi8E3juXRFJEZF4EfmNiOwJ7Et+SeDrbxeRdSLyGvBq4Ll/FJEtgX2//93SE1TqNLQVoqLBWqDWGHMVgIik4d9D/QZjzBYRSQV6gK8BxhhzroicA7wkInMC77EMWGSMaRGRK/Hv/7IC/6rGdSJykfFvKauULWjLXUWDPcAVInKPiFwIlAB1xpgtAMYYr/Fv33wB8LvAcweB4/j3egF42RgzvBf4lYHbDmA7cA722+xLRTltuSvHM8ZUBPZ+/wjwA+C1CbxN14j7AvyXMeaXoahPqamgLXfleCJSAHQbY34H/AhYCeSLyPLA6ymBgdK3gE8HnpuDv4V/6DRv+TfgsyKSHDi2UERyp/5MlAqettxVNDgX+JGI+PDvCHgX/tb3fSKSgL+//XLgF8ADIrIHGARuN8b0jb6KojHmpcC2thsDr3UCtwB23T5ZRSHdFVIppRxIu2WUUsqBNNyVUsqBNNyVUsqBNNyVUsqBNNyVUsqBNNyVUsqBNNyVUsqBNNyVUsqB/j+4VTBceSx2JwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# predict_tag_new\n",
    "import seaborn as sns\n",
    "%matplotlib inline\n",
    "sns.distplot(valid_score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# predict_tag\n",
    "files = os.listdir('../predict_tag/valid_preds/')\n",
    "pred = pd.read_csv('../predict_tag/valid_preds/'+files[0])\n",
    "score = pred.score\n",
    "for f in files[1:]:\n",
    "    if f == '.ipynb_checkpoints':\n",
    "        continue\n",
    "    pred = pd.read_csv('../predict_tag/valid_preds/'+f)\n",
    "    score += pred.score\n",
    "\n",
    "score /= len(files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f2d8804d3c8>"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAEKCAYAAAAyx7/DAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xl8nNV97/HPb7Rbu7XZlmTLuzF2vCAv7LsDNAGysd8LCSlNmqRpQ5v2drlNmqa3uW0DDWTj0oQEMISEEJwACRgDYfMib3i3vMqWba2WtUujmXP/GNk1RLZGskYzz8z3/XrNS6OZR8/8jkf6+sx5znMec84hIiLe4Yt2ASIiMjQKbhERj1Fwi4h4jIJbRMRjFNwiIh6j4BYR8RgFt4iIxyi4RUQ8JjmcjczsANAGBIA+51xlJIsSEZEzCyu4+13pnGsMZ8PCwkJXUVExvIpERBLQ+vXrG51zReFsO5TgDltFRQVVVVWR2LWISFwys4PhbhvuGLcDXjaz9WZ23xle9D4zqzKzqoaGhnBfX0REhijc4L7EObcQuB74gpld9sENnHOPOOcqnXOVRUVh9fZFRGQYwgpu51xt/9d64DlgcSSLEhGRMxs0uM0s08yyT94HlgFbI12YiIgMLJyDkyXAc2Z2cvvlzrnfRrQqERE5o0GD2zm3D5g3CrWIiEgYdOakiIjHKLhFRDxGwS0i4jEROXNSRORcLF9TE/a2dyyZGMFKYpN63CIiHqPgFhHxGAW3iIjHKLhFRDxGwS0i4jEKbhERj1Fwi4h4jIJbRMRjFNwiIh6j4BYR8RgFt4iIxyi4RUQ8RsEtIuIxCm4REY9RcIuIeIyCW0TEYxTcIiIeo+AWEfEYBbeIiMcouEVEPEbBLSLiMQpuERGPUXCLiHiMgltExGMU3CIiHqPgFhHxGAW3iIjHKLhFRDxGwS0i4jEKbhERjwk7uM0sycw2mtlvIlmQiIic3VB63F8GdkSqEBERCU9YwW1mZcAfAY9GthwRERlMuD3uB4GvAsEzbWBm95lZlZlVNTQ0jEhxIiLyhwYNbjP7CFDvnFt/tu2cc4845yqdc5VFRUUjVqCIiLxfOD3ui4EbzewA8DRwlZk9EdGqRETkjAYNbufc/3LOlTnnKoDbgFXOubsiXpmIiAxI87hFRDwmeSgbO+deB16PSCUiIhIW9bhFRDxGwS0i4jEKbhERj1Fwi4h4jIJbRMRjFNwiIh6j4BYR8RgFt4iIxyi4RUQ8RsEtIuIxCm4REY9RcIuIeIyCW0TEYxTcIiIeo+AWEfEYBbeIiMcouEVEPEbBLSLiMQpuERGPUXCLiHiMgltExGMU3CIiHqPgFhHxGAW3iIjHKLhFRDxGwS0i4jEKbhERj1Fwi4h4jIJbRMRjFNwiIh6j4BYR8RgFt4iIxyi4RUQ8RsEtIuIxgwa3maWb2Voz22xm28zs66NRmIiIDCw5jG16gKucc+1mlgK8ZWYvOedWR7g2EREZwKDB7ZxzQHv/tyn9NxfJokRE5MzCGuM2syQz2wTUA68459YMsM19ZlZlZlUNDQ0jXaeIiPQLK7idcwHn3HygDFhsZnMG2OYR51ylc66yqKhopOsUEZF+Q5pV4pxrAV4DrotMOSIiMphwZpUUmVle//0M4FpgZ6QLExGRgYUzq2Q88BMzSyIU9M84534T2bJERORMwplV8h6wYBRqERGRMOjMSRERj1Fwi4h4jIJbRMRjFNwiIh6j4BYR8RgFt4iIxyi4RUQ8RsEtIuIxCm4REY9RcIuIeIyCW0TEYxTcIiIeo+AWEfEYBbeIiMcouEUkZnT7A/zorf08vKqaF947QmuXP9olxaRwLqQgIhJxb+xu4C9/vpmGth7G5aTz7r4mVu9v5przSrh8hq5jezoFt4hEXU1TJ19cvoEJuRk8fPsC9jZ00NzRywtbjvLytmNUFIxhUkFmtMuMGRoqEZGo6vYH+NPl6zHg0bsrWTKlAICxmancUllGbkYKv9xQS18gGN1CY4iCW0Si6psv7GBrbSv/cct8yseOed9zaclJ3LyglIb2Hl7b1RClCmOPgltEomb7kVYeX32Qz1w8mWtnlwy4zYySbOaX5/HG7nqaO3pHucLYpOAWkaj59iu7yElP5svXTD/rdstml+AcrD94fJQqi20KbhGJig01x1m5o54/uXwquRkpZ902b0wq04qz2FBznKBzo1Rh7FJwi0hU/MfLuyjITOWeiyrC2v6CSfmc6PKzt749soV5gIJbREbdmn1NvL2nic9fMZXMtPBmJc8en0NGShJVGi5RcIvI6PvR2/vJH5PCXUsnhf0zyUk+5k/MY/vRVjp7+iJYXexTcIvIqDrU3Mkr2+u4ffFE0lOShvSzlZPyCQQdmw63RKg6b1Bwi8ioenz1QcxsSL3tk8bnZlCcncaOo60RqMw7FNwiMmo6e/t4em0N180Zx4S8jGHtY0ZJNgeaOunpC4xwdd6htUpEZFQsX1PDmv1NtHb3UZaXwfI1NcPaz4ySbN7a08i+hg7OG58zwlV6g3rcIjIqnHOs2dfMhNx0Jn7g1PahqCgYQ0qSsbuubQSr8xYFt4iMikPHuzjW2s3iyQWY2bD3k5zkY2pRFrvr2nAJejKOgltERsXa/c2kJvuYV5Z7zvuaUZLN8U4/Te2JuXaJgltEIu5El58ttS3MK8sjbYhTAAcyoyQbgN31iTlcouAWkYh7flMt/oBjccXYEdnf2MxUCjJTE3ace9DgNrNyM3vNzLab2TYz+/JoFCYi8cE5x/I1NUzIS6c0f3hTAAcyoySb/Y0d9PYl3gUWwulx9wH3O+dmA0uBL5jZ7MiWJSLxYuOhFnYea2PRCPW2T5pcmIk/4Nh65MSI7tcLBg1u59xR59yG/vttwA6gNNKFiUh8eHJ1DZmpScwvyxvR/U4qCE0prDrQPKL79YIhjXGbWQWwAFgTiWJEJL6c6PTzm/eOcNOC0hE5KHm67PQUCjJTWXcg8VYLDDu4zSwLeBb4c+fcHywUYGb3mVmVmVU1NOjacCICz244TE9fkDsWT4zI/isKMqk60EwwmFjzucMKbjNLIRTaTzrnfjnQNs65R5xzlc65yqKiopGsUUQ8yDnH8rU1zCvPY07puc/dHsikgjEc7/SzrzGxLq4QzqwSA/4L2OGc+3bkSxKReLB2fzN76tu5c0lketsQ6nEDCTdcEk6P+2LgfwBXmdmm/tsNEa5LRDxu+doastOT+eiHJkTsNQqyUvvHuRPrAOWgqwM6594Chr+wgIgknOaOXl7acow7lkwkI3VkD0qezsyorMinSj1uEZFz84v1h+gNBLkjgsMkJy2qGEtNcyd1rd0Rf61YoeAWkREVDIbOlFxUkX9qTZFIOnliTyINlyi4RWREvbO3iQNNndy5ZOiXJhuO88bnkJrsY/OhxLkOpYJbREbU8rUHyR+TwnVzxo3K66Um+zh/Qg6bDyXOqe8KbhEZMbUtXfxuWx2fqiwf8hXcz8X88jy21J6gL5AYC04puEVkxPzknQMA3H1Rxai+7vzyPLr8AXbXJcaJOApuERkRbd1+nlpTww1zx1M6zCu4D9e8/gWsNh9OjHFuBbeIjIifrTtEW08f914yedRfe1LBGPLGpLCpRsEtIhKWvkCQH799gEUV+cwvH9nlW8NhZswry1OPW0QkXC9sOUptSxf3XjIlajXMK89jd10bHT19UathtCi4ReScBIKOh1btYXpxFstml0StjvnluQQdbK2N/2mBCm4ROScvbjnKnvp2/uzq6fh80VvW6OQByk0JcCKOgltEhi0YdHzn1WqmF2dxw9zxUa2lICuN8rEZCTHOPejqgCIiZ/K3z22hur6dWxeV87N1h6JdDvPK8tiYADNL1OMWkWHxB4Ks3FFPUXYacyN0hZuhml+eR21LF/Vt8b1SoIJbRIZl+ZoaGtt7uP78cfgsNpbsPzkVMd7XLVFwi8iQnej08+DK3UwtymTmuMgv3Rqu8yfkkuSzuF8pUGPcZ7F8TU3Y247GgvEiseLh16pp6fJz19JJWIz0tgEyUpOYWZId9zNL1OMWkSHZU9/OY+8c4JYLyhmfO7prkoRj/sTQGZTBoIt2KRGj4BaRsAWDjr959j3GpCbzlx+eGe1yBjS/LI+27j72N3VEu5SIUXCLSNieWHOQqoPH+YePzKYoOy3a5Qxo3qkDlPE7XKLgFpGwHGnp4lsv7eTS6YV8YmFptMs5o2nFWWSmJsX1OLeCW0QGFQg6vvLMJoIO/uVjc2PqgOQHJfmMuWW56nGLSGJ7aFU1q/c1842b51A+dky0yxnUvPI8th9tpacvEO1SIkLTAUeIpg5KvHp3bxPfebWajy8s5ZMXlEW7nLDML8vDH3BsP9LKgon50S5nxKnHLSJnVNvSxZ89vZGKwky+cdOcaJcTtvkT4/sApXrcIjKgtm4/9z62ju7eAMs/u4TMNO/ExbicdIqz09h8OD5PfffOOyEio6YvEORLT22kur6dxz69iOklsXNaezjMjHnleXHb49ZQSQR1+wPUtnTR1N5Db18w2uWIhCUQdHz1F+/x+q4Gvn7j+Vw6vSjaJQ3L/PI89jV2cKLTH+1SRpx63COsrdvPm9WNbD/aSnNH7/ueK8hMZfaEHGZPyInKBVVFBhMMOv7uuS38cmMtX7l2BnctnRTtkobt1EqBh1u4bIY3//M5EwX3CAkEHa/urOPtPY30BRyzxudQOSmfwqw0egNB2rr87Gvs4J09TbxZ/TaLKvL5wpXTuHxGUUzPiZXEEQg6/v5XW3l63SG+eOU0/uzq6dEu6ZzMLQutEb75kIJbBtDtD/DU2hqq69v5UFku18wqoXCA04EvnwldvQGSfPDI7/dxz4/XsXjyWL720fOZPSEnCpWLhPT0BfiLn23ixS3HuHxGEeNz04c0xTUW5aSnMLUoMy7PoFRwn6PWLj+PvXOA+rZuPr6glMqKsWfdPiM1iTuWTOSOJZN4puoQ335lNx956E3uWjqJv/rwTLLTU0apcpGQE51+PvfEet7d18QNc8ZxiUfHtAcyvzyfN3bX45yLq0+2Cu5z0NsX5PHVB2nu7OXuiyqYXhzekfeTPRmfGV+4Yhord9Tx+LsHeX7TEW6aN4FZ4/+7962TdSSSquva+OOfVlHb0sUDt86jqze+DqLPL8/l2Q2HqW3poiw/9s/4DNegs0rM7EdmVm9mW0ejIK9wzvHLjYc50tLFrZXlYYf2B2WkJvHReRP43OVTSU/x8dPVB3l6XQ3tPX0jXLHI+7205Sg3f/dt2nsCPH3fUj62wBtnRQ7FvDi9lFk40wEfA66LcB2e88buBt47fIJls0s4b/y5j0+Xjx3DF66cxtXnFbOttpUHV+5mY81xnIvfxeAlOrr9Af7+V1v4/JMbmFaSza+/dDEXTDr7EJ9XzRqXQ2qyj401x6NdyogadKjEOfd7M6uIfCnecaSli5U76phbmjuiR6uTfT6unlXCnAm5PLexlp+vP0x9Ww/f/NicuPqYJ6Pr9IOMtS1d/GL9Iepae7h0WiHXnl/CazsbolhdZKUm+5hXlkvVwfgKbp2AM0SBoOPZDYcZk5rMTfMnROSAR0lOOvddNoWPfGg86w40s+yB3/Pjt/cTiONLMUlk9QWCrNxRx/df30Nnb4C7L6zg+rnjSfbFfwQsqhjL1toTdPbGz/DjiL1rZnafmVWZWVVDQ/z+D/5WdQNHT3Rz47wJjEmN3LFdnxkXTS3k5b+4jEUVY/n6r7fzyR+8Q3VdW8ReU+LT7ro2vrOqmlU76/lQWR5/fvWMmLoye6QtqhhLX9DF1bTAEQtu59wjzrlK51xlUVH8TCc6XVN7D6/urOf8CTnMKc0dldcsyx/DY59exAO3zuNAYwc3fOdNHly5W6fQy6AONXdy30+reOydAzgH91xUwS2V5WSkJkW7tFG1cGI+ZlB1IH6GSzQdcAh+u+0YPjM+Om/CqL6umfGxBWVcOr2Ir/96Ow+urGbF5iP89XWzWDa7JK7mp8q56+jp49E39/O91/fgM2PZ7BIumVZIclL8D4sMJHdMCjNLsll3oDnapYyYcKYDPgW8C8w0s8Nmdm/ky4o9Bxo72HaklctmFJETpZNkCrPSeOj2BfzonkoA/uTx9XzqB++y/mD8/ELK8HX1BvjhG3u59P++xgMrd3P1ecWsvP9yrphZnLChfdKiirFsOHicvkB8fFINZ1bJ7aNRSCxzzvHi1qPkpCdzybTCaJfDVbNKuGx6Ec9UHeaBlbv5xPffZdnsEu5fNjOhxi4lpNsf4Mk1NXz/9b00tvdw6fRC/uLaGSyMwyu/DFdlRT6Prz7IzmNtozbMGUkaKgnDltoTHD7exScWlpGaPLo9l8HWi/jCFdN4a08jb+xu4OXtdVwxs4j7LpvChVMKNIQS5+pau3ly9UGWr62hsb2Xi6YW8P27FrJokGUXEtHJf5N1B5oV3IkgEHSs3FHHuJx0FkyMvaVYU5N9XDWrmKWTx7J6fzObDh3njv+3hrmludx32RSumzOOlAT/mBxPnHNsqGnhsXcO8NKWowSc46qZxXz20ilcOLUg2uXFrAl5GZTmZVB14DifvnhytMs5ZwruQbx3uIXG9l7uWDwRXwz3YMekJXPVrGIevmMBv9xQy6Nv7uNLT22kMCuVm+eX8qnKcg2jeFhtSxcrNh3h+U217DzWRnqKjyWTx7J0SgEFWWnsb+xgf2NHtMuMaYsq8nl7b1NcLDil4D6LoHO8tquecTnpnll2NT0ltPrgbYvKeW1XPc9UHeKxdw7w6Fv7mVuayycvKOP6OeMozkmPdqkyiMb2Hl7eVsevNtWydn/oAPTCiXl84+Y59AWCpCUn1rS+c3Xh1AJ+tekI1fXtzPDYpdg+SMF9Fl7pbQ/E5zOuPq+Eq88roam9h+c3HeEX6w/zjyu28Y8rtrFgYh7LZo9j2fklTC3Kina5QmgYZNuRVlbtrGfVzno2H27BOZhalMn9187gpvmlTCwILX3g9bWyo+HkcrVvVjcquONVIOhYtbPBU71tOPMfdHpKEnctnURdazfbj7ay/Ugr3/rtTr71251MKczk0umFXDytkKVTC6I23THROOc42NTJ6n1NrNnfzDt7G6lr7cEM5pXl8ZVrZnDVecXMHp/j+Y/2saA0L4MphZm8Vd3AvZd4e5xbwX0Gv3nvCI3tPdzuwd722ZTkpFOSk86VM4u5YmYRK3fU8eqOep6pOsxP3j1Iks+YV5bLRVMLqazIZ+GkfAX5COn2B9h+tJXNh1rYWNPC2v3NHGvtBkJz9MfnpnPptCJmjMsmKy30p7n50Im4W5I0mi6ZXsjPqw7T0xfw9FCTgnsAgaDjP1+tpiQnjfM91Nseqtd3NZDs8/Hh88dx9axiapo72dPQzp76dr73+h6CDoxQ2F8zu5hFFWO5YFI+pXkZ6gEOoq3bz65jbew41sbOo61sqT3BjqOt+AOhhcJKctJYVBE6uLh0SgFTizJ5au2hKFcd/y6dXsRP3z3IhoMtnp6Fo+AewG/eO8K+ho64622fTXKSjylFWUwpymLZ7NA1CA81d3GwuYODTZ08U3WYJ1aHhmFy0pNPTa8qzQ99Pf2Sa4ly1Z5uf4CjJ7o50NTBgcbQv9P+xg721LdT29J1arv0FB/jczO4cEohZfkZlI8dQ27Gf/97rd3ffOrgo0TW0iljSfIZb+1pUHDHk0DQ8Z1Xq5lRkhXXve3BpCUnMa04i2nFoQOXgaCjrrWbg00d1DR3UtvSza5jbZxcaPZkmE/IyyA7PZmpRVlMLsyMuQWNgkFHbyBIjz9ITyBAjz9IbyBIb1+Qnr6TXwP09gVp6+7jRJefE11+Wrv8tHT5qW/roe5EN3Vt3bR0+t+376y0ZCoKx7BwUj53LJnIeeOz2Xm0jdyMFH1CiRHZ6SksKM/jzepG/urD0a5m+BTcH/DClqPsbejg4TsW0NoVP+v3nqskn50K5gunhh7r8Qc4cqKb2pYujrR0UXu8i13H2li1s/7Uz5XmZVBROIbi7HSKs9Moyk6jOCedvIwU0lOSSE/xhb72jzf2BYP0BR19AUdfMEi3P0i3P0CXPxD62nvyfvB9j71vG3/o04I/EOy/OXr7gvQFg6eGKoYqIyWJvDEpFGen4fMZM0uyyclIITc9hYKsVAqy0shMTXpfQB870UPemNTh/6NLRFwyvZD/fLWa4x295Gd68/1RcJ/mZG97enEWN8wZz9PrNOZ4NmkpSUwuzGRyYeapx3r7giyePJZ9je3sre9gX2M7Nc2drN3fTENbD70jvMhPSpKRkuQ77Rb6PjXJR2ZqEsn991OST9vOZyQn+UjyGSlJRpLPR7LPSE4ykk+7n5acREZq6D+XRLjgQKK4dHoRD66s5s09jdw4yit9jhQF92le3HKUPfXtPHT7Anw+fbQdjtRk36kF64v6e9hLJofGEp1zdPkDtHX30dUbwB8McvHUQrr7Qj1ooD9ILRSevlBvPCM19HXl9npSkqw/iEMBqyEIGar55XkUZqXxu63HFNxeFzy9tz13fLTLiUtmxpjU5PddOai+red925zt6lJF2WmRKk0SSJLPuG5OCc+ur6WrNxBzx2HCoc9//V7cepTq+na+dPV0ktTbFolrN8wZT5c/wBu76wffOAYpuOmft72ymmnFWfyRetsicW/x5LGMzUzlxS3Hol3KsCi4gec21lJd385Xrp2h3rZIAkhO8vHh80t4dUcd3f5AtMsZsoQP7p6+AA+8spu5pblcP2dctMsRkVFy/ZzxdPQGeLO6MdqlDFnCB/eTq2uobenir6+bpRkKIgnkwqkF5Gak8MJ7R6JdypAldHC39/Tx8Gt7uHhaAZdMj/61JEVk9KQk+fjovPG8tPUYxzt6o13OkCR0cD/0ajXNHb189cOzol2KiETBXUsn0dMX5OfrvXWyXcIG9576dv7rrf3cWlnOvPLYu5akiETerHE5LK4YyxOrawgGh7ccQjQkZHA75/jaim2MSU3iq9fNjHY5IhJFd104iZrmTt6oboh2KWFLyOD+7dZjvLWnkfuXzaQgS2fjiSSy684fR2FWGo+/ezDapYQt4YK7qb2Hf3h+G7PH53BngqwbLSJnlprs4/bFoYtr76lvi3Y5YUmo4HbO8bfPbaG1y8+3b51HclJCNV9EzuDuiyrITE3mX1/aFe1SwpJQyfXshlp+t62O+5fNYNa4xL1Igoi8X2FWGp+/Yiord9SxZl9TtMsZVMIE9+66Nr62YhuLK8by2UunRLscEYkxn7l4MuNz0/mXF3fE/AyThAjuxvYePvPYOjJSk3jwtvlaj0RE/kBGahL3L5vJ5sMn+NWm2miXc1ZxH9zd/gB/8vh6Gtt7+K+7K5mQlxHtkkQkRn18QSkLJ+bxv5/fxr6G9miXc0ZxHdwdPX388U+rWH/wOA/cMp8PlelEGxE5M5/PeOiOhaQkGX/65Aa6emNz5cC4De7jHb3c+ega3tnbxL9/ah7Xa51tEQlDaV4GD962gF11bfzNL98jEIPj3XEZ3BtqjnPz995m+9FWvn/nQj55QVm0SxIRD7l8RhH3XzuD5zcd4YvLN8Tcmt1xdc3Jbn+A772+l+++todxOek8+dklLKoYG+2yRMSDvnjVdNJTkvjnF3bQ1LGWh+9YQHF2erTLAsIMbjO7DvhPIAl41Dn3rxGtaog6e/t4au0hfvDGXhraevj4glK+dtP55KSnRLs0EfGwz146heKcdP7ymc1c9e9v8OWrp3PPxRWkRPnkvUGD28ySgO8C1wKHgXVmtsI5tz3SxZ1Nc0cv6w408+KWo7yyvY7O3gAXTing4dsXsGRKQTRLE5E4cuO8CcwtzeWffr2Nb764g0ff2sfNC0r52IJSZpZkR+UCLOH0uBcDe5xz+wDM7GngJmDEg3vL4RP4g0ECQUcg6AgGHX1BR0uXn+b2Ho6e6GZ/YwfV9e3sb+wAIDcjhZvmT+ATC8uo1LCIiETA5MJMfvzpxby+q54nVh/k0Tf388M39pGbkcKCiXlMLcqiLD+D8vwxXDO7JOL1hBPcpcDpq4wfBpZEophP/fAduv3BMz6fmuRjYsEYphdncUtlORdMymd+eR6pyXF5jFVEYswVM4u5YmYxje09rNpRz4aa42w61MLqfU10+4MUZafFTHCHxczuA+7r/7bdzCKyWks18GokdvyHCgHvXUX07NQmb1CbhuDOSOx0cAO25yBgfz/sfU4Kd8NwgrsWKD/t+7L+x97HOfcI8Ei4LxzrzKzKOVcZ7TpGktrkDWpT7It2e8IZY1gHTDezyWaWCtwGrIhsWSIiciaD9ridc31m9kXgd4SmA/7IObct4pWJiMiAwhrjds69CLwY4VpiTdwM+5xGbfIGtSn2RbU95lzsnYcvIiJnpnl0IiIek/DBbWbXmdkuM9tjZn8zwPNpZvaz/ufXmFnF6Fc5NGG06Stmtt3M3jOzV80s7GlI0TJYm07b7hNm5swspmcwhNMeM7ul/33aZmbLR7vGoQrj926imb1mZhv7f/duiEadQ2FmPzKzejPbeobnzcy+09/m98xs4agU5pxL2Buhg617gSlAKrAZmP2Bbf4U+EH//duAn0W77hFo05XAmP77n4+HNvVvlw38HlgNVEa77nN8j6YDG4H8/u+Lo133CLTpEeDz/fdnAweiXXcY7boMWAhsPcPzNwAvAQYsBdaMRl2J3uM+dTq/c64XOHk6/+luAn7Sf/8XwNUWjcUJwjdom5xzrznnOvu/XU1obn4sC+d9AvgG8C2gezSLG4Zw2vPHwHedc8cBnHP1o1zjUIXTJgecvEp3LnBkFOsbFufc74Hms2xyE/BTF7IayDOziC/+n+jBPdDp/KVn2sY51wecAGJ5Fatw2nS6ewn1GGLZoG3q/4ha7px7YTQLG6Zw3qMZwAwze9vMVvev0BnLwmnT14C7zOwwoVlqXxqd0iJqqH9vIyKu1uOWoTGzu4BK4PJo13IuzMwHfBu4J8qljKRkQsMlVxD6RPR7M5vrnGuJalXn5nbgMefcf5jZhcDjZjbHOXfmBYpkQIne4w7ndP5T25hZMqGPeE2jUt3whLVEgZldA/wdcKNzrmd6MTP7AAAC6ElEQVSUahuuwdqUDcwBXjezA4TGGlfE8AHKcN6jw8AK55zfObcf2E0oyGNVOG26F3gGwDn3LpBOaM0PLwvr722kJXpwh3M6/wrg7v77nwRWuf6jEjFq0DaZ2QLgh4RCO9bHTmGQNjnnTjjnCp1zFc65CkLj9jc656qiU+6gwvm9+xWh3jZmVkho6GTfaBY5ROG0qQa4GsDMziMU3A2jWuXIWwH8z/7ZJUuBE865oxF/1WgftY32jdBR4d2Ejoj/Xf9j/0ToDx9Cv1w/B/YAa4Ep0a55BNq0EqgDNvXfVkS75nNt0we2fZ0YnlUS5ntkhIZ/tgNbgNuiXfMItGk28DahGSebgGXRrjmMNj0FHAX8hD4F3Qt8Dvjcae/Td/vbvGW0fu905qSIiMck+lCJiIjnKLhFRDxGwS0i4jEKbhERj1Fwi4h4jIJbRMRjFNwinDorVsQTFNziWWaWaWYvmNlmM9tqZrea2SIze6f/sbVmlm1m6Wb2YzPb0r8W9JX9P3+Pma0ws1XAq/2P/ZWZretfW/nrUW2gyBmolyFedh1wxDn3RwBmlktoDetbnXPrzCwH6AK+DDjn3FwzmwW8bGYz+vexEPiQc67ZzJYRWg9kMaEz4laY2WUutLSnSMxQj1u8bAtwrZl9y8wuBSYCR51z6wCcc60utBTvJcAT/Y/tBA4SWvsD4BXn3Mn1lpf13zYCG4BZxPbCTpKg1OMWz3LO7e5fh/sG4J+BVcPYTcdp9w34P865H45EfSKRoh63eJaZTQA6nXNPAP8GLAHGm9mi/uez+w86vgnc2f/YDEI9810D7PJ3wGfMLKt/21IzK458S0SGRj1u8bK5wL+ZWZDQ6m2fJ9RrfsjMMgiNb18DfA/4vpltAfqAe5xzPR+8Ap1z7uX+5Ubf7X+uHbgL8MLSt5JAtDqgiIjHaKhERMRjFNwiIh6j4BYR8RgFt4iIxyi4RUQ8RsEtIuIxCm4REY9RcIuIeMz/BzAkcNRbQk8YAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.distplot(score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "重合率69.325000%\n"
     ]
    }
   ],
   "source": [
    "valid_tag = pd.read_csv('../predict_tag/valid_tag.csv')['tag']\n",
    "valid_tag_new = pd.read_csv('../predict_tag_new/valid_tag.csv')['tag']\n",
    "sim_rate = sum(valid_tag == valid_tag_new)/20000*100\n",
    "print('重合率%f%%'%(sim_rate))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 预测unlabel的tag score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_unlabel():\n",
    "    unlabel_df = joblib.load('../semi_super/x_unlabel.lz4')\n",
    "    x_unlabel_df = unlabel_df.drop(columns=['tag'])\n",
    "    x_unlabel_df = x_unlabel_df.fillna(-1)\n",
    "    x_unlabel_arr = x_unlabel_df.values\n",
    "    return x_unlabel_arr\n",
    "\n",
    "x_unlabel_arr = load_unlabel()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "unlabel_save_path = './unlabel_preds'\n",
    "model_list = ['XGB','RFC','LGB']\n",
    "# skiprows 会造成列名丢失\n",
    "unlabel_id = pd.read_csv('../../preprocess_data_new/train_ax_nodup.csv',usecols=[0],skiprows=33465).values.ravel()\n",
    "\n",
    "predict_score(unlabel_save_path,model_list,unlabel_id,x_unlabel_arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "def combine_score(pred_dir,model_list,unlabel_pred_name,uid):\n",
    "    pred_path = os.path.join(pred_dir,'{}.csv'.format(model_list[0]))\n",
    "    score = pd.read_csv(pred_path)['score']\n",
    "    for model in model_list[1:]:\n",
    "        pred_path = os.path.join(pred_dir,'{}.csv'.format(model))\n",
    "        score += pd.read_csv(pred_path).score\n",
    "        \n",
    "    score = score/len(model_list)\n",
    "    pred = pd.DataFrame()\n",
    "    pred['id'] = uid\n",
    "    pred['score'] = score.values\n",
    "    pred.to_csv('./{}.csv'.format(unlabel_pred_name),index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "unlabel_pred_name = 'unlabel_tag_score'\n",
    "unlabel_pred_dir = './unlabel_preds'\n",
    "combine_score(unlabel_pred_dir,model_list,unlabel_pred_name,unlabel_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
